DataScience


Python

R

  • Numpy [ array | matrices ]
  • Pandas [ data analysis ]
  • Matplotlib / Bokeh [ visualization ]
  • Math
    • Linear Algebra
    • Probability and Statistics
    • Calculus

Advance Computing

- Machine Learning
- Machine Vision
- Data Scientist

In [ ]:

Data Science

Numpy


In [1]:
from array import array

In [2]:
array('i', [1, 2, 3])


Out[2]:
array('i', [1, 2, 3])

In [ ]:


In [3]:
import numpy as np

In [4]:
np.array([1, 5, 6, 9])


Out[4]:
array([1, 5, 6, 9])

In [5]:
arr = np.array([1, 5, 6, 9])

In [6]:
arr.dtype


Out[6]:
dtype('int64')

In [7]:
np.array([1.2, 5.6, 4, 9.0, 7])


Out[7]:
array([ 1.2,  5.6,  4. ,  9. ,  7. ])

In [8]:
np.array([1.2, 5.6, 4, 9.0, 7]).dtype


Out[8]:
dtype('float64')

In [10]:
np.array(['1', 5, 6])


Out[10]:
array(['1', '5', '6'], 
      dtype='<U1')

In [11]:
np.arange(1, 9)


Out[11]:
array([1, 2, 3, 4, 5, 6, 7, 8])

In [12]:
m1 = np.arange(1, 9)

In [13]:
m1


Out[13]:
array([1, 2, 3, 4, 5, 6, 7, 8])

In [14]:
m1.shape


Out[14]:
(8,)

In [16]:
m1.size


Out[16]:
8

In [19]:
m1 * 4


Out[19]:
array([ 4,  8, 12, 16, 20, 24, 28, 32])

In [21]:
m1* 2


Out[21]:
array([ 2,  4,  6,  8, 10, 12, 14, 16])

In [22]:
m1 + (m1 * 2)


Out[22]:
array([ 3,  6,  9, 12, 15, 18, 21, 24])

In [23]:
m1.ndim


Out[23]:
1

In [24]:
m2 = np.array([[1, 2, 3], [7, 8, 9]])

In [25]:
m2.ndim


Out[25]:
2

In [26]:
m2.size


Out[26]:
6

In [27]:
m2.shape


Out[27]:
(2, 3)

In [28]:
m2


Out[28]:
array([[1, 2, 3],
       [7, 8, 9]])

In [31]:
m3 = m2.transpose()

In [32]:
m3


Out[32]:
array([[1, 7],
       [2, 8],
       [3, 9]])

In [33]:
m3.shape


Out[33]:
(3, 2)

In [34]:
np.zeros((2, 3))


Out[34]:
array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [35]:
np.ones((3, 2))


Out[35]:
array([[ 1.,  1.],
       [ 1.,  1.],
       [ 1.,  1.]])

In [36]:
np.diag((3, 4))


Out[36]:
array([[3, 0],
       [0, 4]])

In [39]:
help(np.ones)


Help on function ones in module numpy.core.numeric:

ones(shape, dtype=None, order='C')
    Return a new array of given shape and type, filled with ones.
    
    Parameters
    ----------
    shape : int or sequence of ints
        Shape of the new array, e.g., ``(2, 3)`` or ``2``.
    dtype : data-type, optional
        The desired data-type for the array, e.g., `numpy.int8`.  Default is
        `numpy.float64`.
    order : {'C', 'F'}, optional
        Whether to store multidimensional data in C- or Fortran-contiguous
        (row- or column-wise) order in memory.
    
    Returns
    -------
    out : ndarray
        Array of ones with the given shape, dtype, and order.
    
    See Also
    --------
    zeros, ones_like
    
    Examples
    --------
    >>> np.ones(5)
    array([ 1.,  1.,  1.,  1.,  1.])
    
    >>> np.ones((5,), dtype=np.int)
    array([1, 1, 1, 1, 1])
    
    >>> np.ones((2, 1))
    array([[ 1.],
           [ 1.]])
    
    >>> s = (2,2)
    >>> np.ones(s)
    array([[ 1.,  1.],
           [ 1.,  1.]])


In [40]:
np.ones(5)


Out[40]:
array([ 1.,  1.,  1.,  1.,  1.])

In [41]:
np.diag(np.ones(5))


Out[41]:
array([[ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]])

In [44]:
np.linspace(0, 1)


Out[44]:
array([ 0.        ,  0.02040816,  0.04081633,  0.06122449,  0.08163265,
        0.10204082,  0.12244898,  0.14285714,  0.16326531,  0.18367347,
        0.20408163,  0.2244898 ,  0.24489796,  0.26530612,  0.28571429,
        0.30612245,  0.32653061,  0.34693878,  0.36734694,  0.3877551 ,
        0.40816327,  0.42857143,  0.44897959,  0.46938776,  0.48979592,
        0.51020408,  0.53061224,  0.55102041,  0.57142857,  0.59183673,
        0.6122449 ,  0.63265306,  0.65306122,  0.67346939,  0.69387755,
        0.71428571,  0.73469388,  0.75510204,  0.7755102 ,  0.79591837,
        0.81632653,  0.83673469,  0.85714286,  0.87755102,  0.89795918,
        0.91836735,  0.93877551,  0.95918367,  0.97959184,  1.        ])

In [87]:
x = np.linspace(1, 10, num=20)

In [88]:
x


Out[88]:
array([  1.        ,   1.47368421,   1.94736842,   2.42105263,
         2.89473684,   3.36842105,   3.84210526,   4.31578947,
         4.78947368,   5.26315789,   5.73684211,   6.21052632,
         6.68421053,   7.15789474,   7.63157895,   8.10526316,
         8.57894737,   9.05263158,   9.52631579,  10.        ])

In [89]:
x[1] - x[0]


Out[89]:
0.47368421052631571

In [45]:
np.linspace(1, 99)


Out[45]:
array([  1.,   3.,   5.,   7.,   9.,  11.,  13.,  15.,  17.,  19.,  21.,
        23.,  25.,  27.,  29.,  31.,  33.,  35.,  37.,  39.,  41.,  43.,
        45.,  47.,  49.,  51.,  53.,  55.,  57.,  59.,  61.,  63.,  65.,
        67.,  69.,  71.,  73.,  75.,  77.,  79.,  81.,  83.,  85.,  87.,
        89.,  91.,  93.,  95.,  97.,  99.])

In [46]:
np.linspace(1, 5, num=20)


Out[46]:
array([ 1.        ,  1.21052632,  1.42105263,  1.63157895,  1.84210526,
        2.05263158,  2.26315789,  2.47368421,  2.68421053,  2.89473684,
        3.10526316,  3.31578947,  3.52631579,  3.73684211,  3.94736842,
        4.15789474,  4.36842105,  4.57894737,  4.78947368,  5.        ])

In [47]:
%matplotlib inline


/home/diwaker/miniconda/envs/pysession/lib/python3.5/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')
/home/diwaker/miniconda/envs/pysession/lib/python3.5/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')

In [48]:
import matplotlib.pyplot as plt

In [49]:
plt.plot(np.linspace(0, 1))


Out[49]:
[<matplotlib.lines.Line2D at 0x7f4c61306518>]

In [50]:
np.pi


Out[50]:
3.141592653589793

In [52]:
np.sin(np.pi/2.0)


Out[52]:
1.0

In [90]:
np.sin(1.4)


Out[90]:
0.98544972998846014

In [93]:
np.sin(np.array([1.4, 2, 3]))


Out[93]:
array([ 0.98544973,  0.90929743,  0.14112001])

In [53]:
x_range = np.linspace(-np.pi, np.pi, 50)

In [54]:
x_range


Out[54]:
array([-3.14159265, -3.01336438, -2.88513611, -2.75690784, -2.62867957,
       -2.5004513 , -2.37222302, -2.24399475, -2.11576648, -1.98753821,
       -1.85930994, -1.73108167, -1.60285339, -1.47462512, -1.34639685,
       -1.21816858, -1.08994031, -0.96171204, -0.83348377, -0.70525549,
       -0.57702722, -0.44879895, -0.32057068, -0.19234241, -0.06411414,
        0.06411414,  0.19234241,  0.32057068,  0.44879895,  0.57702722,
        0.70525549,  0.83348377,  0.96171204,  1.08994031,  1.21816858,
        1.34639685,  1.47462512,  1.60285339,  1.73108167,  1.85930994,
        1.98753821,  2.11576648,  2.24399475,  2.37222302,  2.5004513 ,
        2.62867957,  2.75690784,  2.88513611,  3.01336438,  3.14159265])

In [56]:
np.sin(x_range)


Out[56]:
array([ -1.22464680e-16,  -1.27877162e-01,  -2.53654584e-01,
        -3.75267005e-01,  -4.90717552e-01,  -5.98110530e-01,
        -6.95682551e-01,  -7.81831482e-01,  -8.55142763e-01,
        -9.14412623e-01,  -9.58667853e-01,  -9.87181783e-01,
        -9.99486216e-01,  -9.95379113e-01,  -9.74927912e-01,
        -9.38468422e-01,  -8.86599306e-01,  -8.20172255e-01,
        -7.40277997e-01,  -6.48228395e-01,  -5.45534901e-01,
        -4.33883739e-01,  -3.15108218e-01,  -1.91158629e-01,
        -6.40702200e-02,   6.40702200e-02,   1.91158629e-01,
         3.15108218e-01,   4.33883739e-01,   5.45534901e-01,
         6.48228395e-01,   7.40277997e-01,   8.20172255e-01,
         8.86599306e-01,   9.38468422e-01,   9.74927912e-01,
         9.95379113e-01,   9.99486216e-01,   9.87181783e-01,
         9.58667853e-01,   9.14412623e-01,   8.55142763e-01,
         7.81831482e-01,   6.95682551e-01,   5.98110530e-01,
         4.90717552e-01,   3.75267005e-01,   2.53654584e-01,
         1.27877162e-01,   1.22464680e-16])

In [55]:
plt.plot(x_range, np.sin(x_range))


Out[55]:
[<matplotlib.lines.Line2D at 0x7f4c610836d8>]

In [57]:
plt.plot(x_range, np.tan(x_range))


Out[57]:
[<matplotlib.lines.Line2D at 0x7f4c6104c438>]

In [ ]:


In [61]:
np.random.rand(3, 2)


Out[61]:
array([[  8.80154703e-01,   6.86949977e-04],
       [  3.90160791e-02,   1.31479509e-01],
       [  7.19499261e-02,   8.13752618e-01]])

In [63]:
np.random.randint(1, 99)


Out[63]:
15

In [64]:
np.random.randint(1, 99)


Out[64]:
31

In [66]:
np.random.randint(1, 99, size=8)


Out[66]:
array([73, 64, 89, 72, 90, 68, 29, 69])

In [67]:
m5 = np.random.randint(1, 99, size=12)

In [68]:
m5


Out[68]:
array([ 6,  8, 40,  5, 14, 81, 96, 43, 39, 64,  6, 98])

In [69]:
m5.mean()


Out[69]:
41.666666666666664

In [70]:
np.median(m5)


Out[70]:
39.5

In [71]:
m5.max()


Out[71]:
98

In [72]:
m5.min()


Out[72]:
5

In [73]:
m5.std()


Out[73]:
34.120700787384514

In [74]:
m5.sum()


Out[74]:
500

In [ ]:

pandas

  • Series
  • DataFrame

In [75]:
import pandas as pd

In [76]:
countries = ["Nepal", "India", "Pakistan", "Bhutan"]
zip_codes = [977, 91, 233, 987]

In [79]:
dataset = list(zip(countries, zip_codes))

In [80]:
dataset


Out[80]:
[('Nepal', 977), ('India', 91), ('Pakistan', 233), ('Bhutan', 987)]

In [81]:
df = pd.DataFrame(data=dataset, columns=["Country", "Zip Code"])

In [82]:
df


Out[82]:
Country Zip Code
0 Nepal 977
1 India 91
2 Pakistan 233
3 Bhutan 987

In [ ]:


In [97]:
dframe = pd.read_csv('API_NPL_DS2_en_csv_v2.csv', skiprows=3)

In [99]:
dframe.head()


Out[99]:
Country Name Country Code Indicator Name Indicator Code 1960 1961 1962 1963 1964 1965 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 Unnamed: 60
0 Nepal NPL Agricultural machinery, tractors AG.AGR.TRAC.NO NaN 180.000000 190.000000 200.000000 215.000000 225.000000 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 Nepal NPL Fertilizer consumption (% of fertilizer produc... AG.CON.FERT.PT.ZS NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 Nepal NPL Fertilizer consumption (kilograms per hectare ... AG.CON.FERT.ZS NaN NaN NaN NaN NaN NaN ... 1.608929 1.364865 18.202727 25.086697 34.883710 44.069877 57.728477 NaN NaN NaN
3 Nepal NPL Agricultural land (sq. km) AG.LND.AGRI.K2 NaN 35530.000000 35530.000000 35630.000000 35530.000000 35530.000000 ... 41660.000000 41520.000000 41400.000000 41260.000000 41266.000000 41210.000000 41210.000000 NaN NaN NaN
4 Nepal NPL Agricultural land (% of land area) AG.LND.AGRI.ZS NaN 24.846154 24.846154 24.916084 24.846154 24.846154 ... 29.061737 28.964074 28.880363 28.782700 28.786885 28.747820 28.747820 NaN NaN NaN

5 rows × 61 columns


In [100]:
dframe.tail()


Out[100]:
Country Name Country Code Indicator Name Indicator Code 1960 1961 1962 1963 1964 1965 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 Unnamed: 60
1415 Nepal NPL Mobile account (% age 15+) [w2] WP15163_4.1 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN 0.336039 NaN NaN
1416 Nepal NPL Mobile account, male (% age 15+) [w2] WP15163_4.2 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN 0.397731 NaN NaN
1417 Nepal NPL Mobile account, female (% age 15+) [w2] WP15163_4.3 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN 0.282541 NaN NaN
1418 Nepal NPL Mobile account, income, poorest 40% (% ages 15... WP15163_4.8 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN 0.299864 NaN NaN
1419 Nepal NPL Mobile account, income, richest 60% (% ages 15... WP15163_4.9 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN 0.362069 NaN NaN

5 rows × 61 columns


In [101]:
dframe.dtypes


Out[101]:
Country Name       object
Country Code       object
Indicator Name     object
Indicator Code     object
1960              float64
1961              float64
1962              float64
1963              float64
1964              float64
1965              float64
1966              float64
1967              float64
1968              float64
1969              float64
1970              float64
1971              float64
1972              float64
1973              float64
1974              float64
1975              float64
1976              float64
1977              float64
1978              float64
1979              float64
1980              float64
1981              float64
1982              float64
1983              float64
1984              float64
1985              float64
                   ...   
1987              float64
1988              float64
1989              float64
1990              float64
1991              float64
1992              float64
1993              float64
1994              float64
1995              float64
1996              float64
1997              float64
1998              float64
1999              float64
2000              float64
2001              float64
2002              float64
2003              float64
2004              float64
2005              float64
2006              float64
2007              float64
2008              float64
2009              float64
2010              float64
2011              float64
2012              float64
2013              float64
2014              float64
2015              float64
Unnamed: 60       float64
dtype: object

In [111]:
dframe["2014"]


Out[111]:
0                NaN
1                NaN
2                NaN
3                NaN
4                NaN
5                NaN
6                NaN
7                NaN
8       3.480052e+06
9                NaN
10               NaN
11               NaN
12               NaN
13               NaN
14               NaN
15      3.636000e+04
16      2.536449e+01
17               NaN
18      1.500000e+03
19      1.433500e+05
20               NaN
21               NaN
22               NaN
23      9.562680e+06
24               NaN
25               NaN
26               NaN
27      1.471800e+05
28      2.747900e+03
29               NaN
            ...     
1390             NaN
1391             NaN
1392    1.064697e-01
1393    6.209662e+01
1394             NaN
1395    1.036575e+01
1396    9.173636e+08
1397    6.209662e+01
1398    1.105666e+02
1399    5.263940e+01
1400    1.077983e+09
1401    1.058351e+06
1402    1.731225e-01
1403    2.726186e+00
1404    4.430040e+01
1405             NaN
1406    8.320000e+04
1407             NaN
1408             NaN
1409             NaN
1410    3.380135e+01
1411    3.672448e+01
1412    3.126650e+01
1413    2.372530e+01
1414    4.105188e+01
1415    3.360386e-01
1416    3.977310e-01
1417    2.825407e-01
1418    2.998639e-01
1419    3.620692e-01
Name: 2014, dtype: float64

In [108]:
dframe[["Indicator Name", "2014"]].head()


Out[108]:
Indicator Name 2014
0 Agricultural machinery, tractors NaN
1 Fertilizer consumption (% of fertilizer produc... NaN
2 Fertilizer consumption (kilograms per hectare ... NaN
3 Agricultural land (sq. km) NaN
4 Agricultural land (% of land area) NaN

In [110]:
dframe[dframe["Indicator Name"] == "Agricultural land (sq. km)"]


Out[110]:
Country Name Country Code Indicator Name Indicator Code 1960 1961 1962 1963 1964 1965 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 Unnamed: 60
3 Nepal NPL Agricultural land (sq. km) AG.LND.AGRI.K2 NaN 35530.0 35530.0 35630.0 35530.0 35530.0 ... 41660.0 41520.0 41400.0 41260.0 41266.0 41210.0 41210.0 NaN NaN NaN

1 rows × 61 columns


In [112]:
agri = dframe[dframe["Indicator Name"] == "Agricultural land (sq. km)"]

In [114]:
agri.plot(kind='bar')


Out[114]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4c5d753390>

In [129]:
agri.loc[0:, "2000":"2013"]


Out[129]:
2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
3 42491.0 42610.0 42410.0 42270.0 42180.0 42020.0 41850.0 41660.0 41520.0 41400.0 41260.0 41266.0 41210.0 41210.0

In [133]:
agri.loc[0:, "2000":"2013"].plot(kind='bar')


Out[133]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4c5df9b048>

In [138]:
agri = agri.reset_index()

In [139]:
agri.loc[0:, "2000":"2013"]


Out[139]:
2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
0 42491.0 42610.0 42410.0 42270.0 42180.0 42020.0 41850.0 41660.0 41520.0 41400.0 41260.0 41266.0 41210.0 41210.0

In [140]:
agri.loc[0:, "2000":"2013"].transpose()


Out[140]:
0
2000 42491.0
2001 42610.0
2002 42410.0
2003 42270.0
2004 42180.0
2005 42020.0
2006 41850.0
2007 41660.0
2008 41520.0
2009 41400.0
2010 41260.0
2011 41266.0
2012 41210.0
2013 41210.0

In [141]:
agri.loc[0:, "2000":"2013"].transpose().plot(kind='bar')


Out[141]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4c5dcc2f98>

In [142]:
agri_t = agri.loc[0:, "2000":"2013"].transpose()

In [143]:
agri_t


Out[143]:
0
2000 42491.0
2001 42610.0
2002 42410.0
2003 42270.0
2004 42180.0
2005 42020.0
2006 41850.0
2007 41660.0
2008 41520.0
2009 41400.0
2010 41260.0
2011 41266.0
2012 41210.0
2013 41210.0

In [ ]:


In [ ]:


In [144]:
agri_t.columns = ["Agricultural land (sq. km)"]

In [145]:
agri_t


Out[145]:
Agricultural land (sq. km)
2000 42491.0
2001 42610.0
2002 42410.0
2003 42270.0
2004 42180.0
2005 42020.0
2006 41850.0
2007 41660.0
2008 41520.0
2009 41400.0
2010 41260.0
2011 41266.0
2012 41210.0
2013 41210.0

In [146]:
agri_t.plot(kind="bar")


Out[146]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4c5dc87e10>

In [147]:
agri_t.plot()


Out[147]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4c5dccb518>

In [155]:
dframe[dframe["Indicator Name"].str.contains('of goods and services')]


Out[155]:
Country Name Country Code Indicator Name Indicator Code 1960 1961 1962 1963 1964 1965 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 Unnamed: 60
32 Nepal NPL Imports of goods and services (BoP, current US$) BM.GSR.GNFS.CD NaN NaN NaN NaN NaN NaN ... 3.655163e+09 4.371089e+09 5.101054e+09 5.878849e+09 6.447236e+09 6.847432e+09 7.527572e+09 8.601249e+09 NaN NaN
62 Nepal NPL Exports of goods and services (BoP, current US$) BX.GSR.GNFS.CD NaN NaN NaN NaN NaN NaN ... 1.436265e+09 1.710236e+09 1.541847e+09 1.571532e+09 1.862435e+09 1.929205e+09 2.187598e+09 2.363099e+09 NaN NaN
695 Nepal NPL Exports of goods and services (current US$) NE.EXP.GNFS.CD NaN NaN NaN NaN NaN 5.709411e+07 ... 1.327427e+09 1.602783e+09 1.596506e+09 1.533460e+09 1.684070e+09 1.899081e+09 2.059910e+09 2.301359e+09 NaN NaN
696 Nepal NPL Exports of goods and services (current LCU) NE.EXP.GNFS.CN NaN NaN NaN NaN NaN 4.350000e+08 ... 9.356700e+10 1.042070e+11 1.227370e+11 1.142980e+11 1.217140e+11 1.538630e+11 1.811810e+11 2.260220e+11 NaN NaN
697 Nepal NPL Exports of goods and services (constant 2005 US$) NE.EXP.GNFS.KD NaN NaN NaN NaN NaN NaN ... 1.159294e+09 1.167722e+09 1.212941e+09 1.086324e+09 1.063356e+09 1.083739e+09 1.195711e+09 1.478668e+09 NaN NaN
698 Nepal NPL Exports of goods and services (annual % growth) NE.EXP.GNFS.KD.ZG NaN NaN NaN NaN NaN NaN ... -9.448658e-01 7.270102e-01 3.872437e+00 -1.043884e+01 -2.114313e+00 1.916861e+00 1.033204e+01 2.366431e+01 NaN NaN
699 Nepal NPL Exports of goods and services (constant LCU) NE.EXP.GNFS.KN NaN NaN NaN NaN NaN NaN ... 7.757800e+10 7.814200e+10 8.116800e+10 7.269500e+10 7.115800e+10 7.252200e+10 8.001500e+10 9.895000e+10 NaN NaN
700 Nepal NPL Exports of goods and services (% of GDP) NE.EXP.GNFS.ZS NaN NaN NaN NaN NaN 7.765084e+00 ... 1.285566e+01 1.277582e+01 1.241935e+01 9.582536e+00 8.904030e+00 1.007389e+01 1.068908e+01 1.164087e+01 NaN NaN
718 Nepal NPL Imports of goods and services (current US$) NE.IMP.GNFS.CD NaN NaN NaN NaN NaN 1.023756e+08 ... 3.275659e+09 4.172661e+09 4.455550e+09 5.825346e+09 6.227148e+09 6.331152e+09 7.218387e+09 8.151231e+09 NaN NaN
719 Nepal NPL Imports of goods and services (current LCU) NE.IMP.GNFS.CN NaN NaN NaN NaN NaN 7.800000e+08 ... 2.308930e+11 2.712910e+11 3.425360e+11 4.341980e+11 4.500590e+11 5.129480e+11 6.348990e+11 8.005520e+11 NaN NaN
720 Nepal NPL Imports of goods and services (constant 2005 US$) NE.IMP.GNFS.KD NaN NaN NaN NaN NaN NaN ... 2.626664e+09 2.841837e+09 3.200835e+09 4.105320e+09 3.913013e+09 4.045653e+09 4.620121e+09 5.451535e+09 NaN NaN
721 Nepal NPL Imports of goods and services (annual % growth) NE.IMP.GNFS.KD.ZG NaN NaN NaN NaN NaN NaN ... 2.945703e+00 8.191871e+00 1.263257e+01 2.825780e+01 -4.684334e+00 3.389708e+00 1.419965e+01 1.799549e+01 NaN NaN
722 Nepal NPL Imports of goods and services (constant LCU) NE.IMP.GNFS.KN NaN NaN NaN NaN NaN NaN ... 1.585230e+11 1.715090e+11 1.931750e+11 2.477620e+11 2.361560e+11 2.441610e+11 2.788310e+11 3.290080e+11 NaN NaN
723 Nepal NPL Imports of goods and services (% of GDP) NE.IMP.GNFS.ZS NaN NaN NaN NaN NaN 1.392360e+01 ... 3.172361e+01 3.326039e+01 3.466009e+01 3.640237e+01 3.292422e+01 3.358431e+01 3.745693e+01 4.123105e+01 NaN NaN

14 rows × 61 columns


In [173]:
import_export = dframe[dframe["Indicator Code"].isin(["BM.GSR.GNFS.CD", 
                                                      "BX.GSR.GNFS.CD"])]

In [174]:
import_export


Out[174]:
Country Name Country Code Indicator Name Indicator Code 1960 1961 1962 1963 1964 1965 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 Unnamed: 60
32 Nepal NPL Imports of goods and services (BoP, current US$) BM.GSR.GNFS.CD NaN NaN NaN NaN NaN NaN ... 3.655163e+09 4.371089e+09 5.101054e+09 5.878849e+09 6.447236e+09 6.847432e+09 7.527572e+09 8.601249e+09 NaN NaN
62 Nepal NPL Exports of goods and services (BoP, current US$) BX.GSR.GNFS.CD NaN NaN NaN NaN NaN NaN ... 1.436265e+09 1.710236e+09 1.541847e+09 1.571532e+09 1.862435e+09 1.929205e+09 2.187598e+09 2.363099e+09 NaN NaN

2 rows × 61 columns


In [175]:
import_export = import_export.loc[0:, "2007":"2014"]

In [176]:
import_export


Out[176]:
2007 2008 2009 2010 2011 2012 2013 2014
32 3.655163e+09 4.371089e+09 5.101054e+09 5.878849e+09 6.447236e+09 6.847432e+09 7.527572e+09 8.601249e+09
62 1.436265e+09 1.710236e+09 1.541847e+09 1.571532e+09 1.862435e+09 1.929205e+09 2.187598e+09 2.363099e+09

In [177]:
import_export = import_export.transpose()

In [178]:
import_export


Out[178]:
32 62
2007 3.655163e+09 1.436265e+09
2008 4.371089e+09 1.710236e+09
2009 5.101054e+09 1.541847e+09
2010 5.878849e+09 1.571532e+09
2011 6.447236e+09 1.862435e+09
2012 6.847432e+09 1.929205e+09
2013 7.527572e+09 2.187598e+09
2014 8.601249e+09 2.363099e+09

In [179]:
import_export.columns = ["Imports of goods and services (BoP, current US$)",
                        "Exports of goods and services (BoP, current US$)"]

In [180]:
import_export


Out[180]:
Imports of goods and services (BoP, current US$) Exports of goods and services (BoP, current US$)
2007 3.655163e+09 1.436265e+09
2008 4.371089e+09 1.710236e+09
2009 5.101054e+09 1.541847e+09
2010 5.878849e+09 1.571532e+09
2011 6.447236e+09 1.862435e+09
2012 6.847432e+09 1.929205e+09
2013 7.527572e+09 2.187598e+09
2014 8.601249e+09 2.363099e+09

In [181]:
import_export.plot()


Out[181]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4c5d68d828>

In [184]:
import_export.apply(pd.to_numeric, args=('coerce',))


Out[184]:
Imports of goods and services (BoP, current US$) Exports of goods and services (BoP, current US$)
2007 3.655163e+09 1.436265e+09
2008 4.371089e+09 1.710236e+09
2009 5.101054e+09 1.541847e+09
2010 5.878849e+09 1.571532e+09
2011 6.447236e+09 1.862435e+09
2012 6.847432e+09 1.929205e+09
2013 7.527572e+09 2.187598e+09
2014 8.601249e+09 2.363099e+09

In [ ]:


In [185]:
names = ['Bob', 'Jessica', 'Hari', 'John', 'Rajesh', 'Seldon']

In [207]:
names[np.random.randint(0, len(names))]


Out[207]:
'Seldon'

In [208]:
random_names = [names[np.random.randint(0, len(names))]
                for i in range(0, 99)]

In [210]:
len(random_names)


Out[210]:
99

In [211]:
random_ages = [np.random.randint(10, 78) for i in range(0, 99)]

In [213]:
len(random_ages)


Out[213]:
99

In [215]:
age_distrib = pd.DataFrame(list(zip(random_names, random_ages)),
                          columns=["Name", "Age"])

In [216]:
age_distrib.head()


Out[216]:
Name Age
0 Bob 61
1 Rajesh 60
2 Seldon 75
3 Bob 58
4 John 23

In [225]:
bob.count()


Out[225]:
Name    13
Age     13
dtype: int64

In [229]:
bob = age_distrib[age_distrib["Name"] == "Bob"]
bob


Out[229]:
Name Age
0 Bob 61
3 Bob 58
11 Bob 13
13 Bob 32
24 Bob 29
26 Bob 57
47 Bob 33
50 Bob 44
52 Bob 75
53 Bob 50
56 Bob 66
61 Bob 61
65 Bob 14

In [230]:
bob = bob.reset_index()

In [231]:
bob


Out[231]:
index Name Age
0 0 Bob 61
1 3 Bob 58
2 11 Bob 13
3 13 Bob 32
4 24 Bob 29
5 26 Bob 57
6 47 Bob 33
7 50 Bob 44
8 52 Bob 75
9 53 Bob 50
10 56 Bob 66
11 61 Bob 61
12 65 Bob 14

In [235]:
bob.plot(kind="scatter", x='index', y='Age')


Out[235]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4c5d380c50>

In [238]:
np.unique(age_distrib["Name"], return_inverse=True)


Out[238]:
(array(['Bob', 'Hari', 'Jessica', 'John', 'Rajesh', 'Seldon'], dtype=object),
 array([0, 4, 5, 0, 3, 3, 3, 5, 5, 4, 3, 0, 1, 0, 5, 5, 5, 5, 2, 4, 2, 3, 2,
        4, 0, 2, 0, 3, 5, 4, 4, 1, 3, 5, 1, 1, 5, 4, 2, 4, 1, 4, 2, 3, 1, 2,
        2, 0, 5, 2, 0, 3, 0, 0, 2, 1, 0, 5, 1, 4, 4, 0, 4, 5, 1, 0, 5, 2, 1,
        5, 1, 2, 3, 4, 3, 1, 1, 1, 1, 5, 3, 2, 3, 1, 3, 4, 4, 4, 3, 5, 3, 2,
        2, 3, 2, 1, 1, 4, 1]))

In [239]:
age_distrib["Name"]


Out[239]:
0         Bob
1      Rajesh
2      Seldon
3         Bob
4        John
5        John
6        John
7      Seldon
8      Seldon
9      Rajesh
10       John
11        Bob
12       Hari
13        Bob
14     Seldon
15     Seldon
16     Seldon
17     Seldon
18    Jessica
19     Rajesh
20    Jessica
21       John
22    Jessica
23     Rajesh
24        Bob
25    Jessica
26        Bob
27       John
28     Seldon
29     Rajesh
       ...   
69     Seldon
70       Hari
71    Jessica
72       John
73     Rajesh
74       John
75       Hari
76       Hari
77       Hari
78       Hari
79     Seldon
80       John
81    Jessica
82       John
83       Hari
84       John
85     Rajesh
86     Rajesh
87     Rajesh
88       John
89     Seldon
90       John
91    Jessica
92    Jessica
93       John
94    Jessica
95       Hari
96       Hari
97     Rajesh
98       Hari
Name: Name, dtype: object

In [241]:
np.unique(age_distrib["Name"], return_inverse=True)


Out[241]:
(array(['Bob', 'Hari', 'Jessica', 'John', 'Rajesh', 'Seldon'], dtype=object),
 array([0, 4, 5, 0, 3, 3, 3, 5, 5, 4, 3, 0, 1, 0, 5, 5, 5, 5, 2, 4, 2, 3, 2,
        4, 0, 2, 0, 3, 5, 4, 4, 1, 3, 5, 1, 1, 5, 4, 2, 4, 1, 4, 2, 3, 1, 2,
        2, 0, 5, 2, 0, 3, 0, 0, 2, 1, 0, 5, 1, 4, 4, 0, 4, 5, 1, 0, 5, 2, 1,
        5, 1, 2, 3, 4, 3, 1, 1, 1, 1, 5, 3, 2, 3, 1, 3, 4, 4, 4, 3, 5, 3, 2,
        2, 3, 2, 1, 1, 4, 1]))

In [271]:
unique_names, x_values = np.unique(age_distrib["Name"], 
                                   return_inverse=True)

x_values


In [243]:
unique_names


Out[243]:
array(['Bob', 'Hari', 'Jessica', 'John', 'Rajesh', 'Seldon'], dtype=object)

In [274]:
x_values = x_values + 1

In [275]:
age_distrib["Values"] = x_values

In [276]:
age_distrib


Out[276]:
Name Age Values
0 Bob 61 1
1 Rajesh 60 5
2 Seldon 75 6
3 Bob 58 1
4 John 23 4
5 John 64 4
6 John 61 4
7 Seldon 48 6
8 Seldon 73 6
9 Rajesh 57 5
10 John 47 4
11 Bob 13 1
12 Hari 62 2
13 Bob 32 1
14 Seldon 76 6
15 Seldon 76 6
16 Seldon 58 6
17 Seldon 72 6
18 Jessica 40 3
19 Rajesh 60 5
20 Jessica 38 3
21 John 76 4
22 Jessica 20 3
23 Rajesh 16 5
24 Bob 29 1
25 Jessica 60 3
26 Bob 57 1
27 John 42 4
28 Seldon 37 6
29 Rajesh 43 5
... ... ... ...
69 Seldon 40 6
70 Hari 37 2
71 Jessica 16 3
72 John 25 4
73 Rajesh 55 5
74 John 57 4
75 Hari 25 2
76 Hari 25 2
77 Hari 55 2
78 Hari 28 2
79 Seldon 36 6
80 John 21 4
81 Jessica 52 3
82 John 43 4
83 Hari 29 2
84 John 36 4
85 Rajesh 34 5
86 Rajesh 58 5
87 Rajesh 59 5
88 John 15 4
89 Seldon 40 6
90 John 34 4
91 Jessica 38 3
92 Jessica 68 3
93 John 24 4
94 Jessica 36 3
95 Hari 67 2
96 Hari 63 2
97 Rajesh 71 5
98 Hari 66 2

99 rows × 3 columns


In [247]:
unique_ages, y_values = np.unique(age_distrib["Age"], 
                                  return_inverse=True)

In [251]:
len(x_values)


Out[251]:
99

In [277]:
age_distrib.plot(kind='scatter', x='Values', y='Age')


Out[277]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4c57412630>

In [284]:
unique_names = np.insert(unique_names, 0, '')

In [285]:
unique_names


Out[285]:
array(['', 'Bob', 'Hari', 'Jessica', 'John', 'Rajesh', 'Seldon'], dtype=object)

In [286]:
ax = age_distrib.plot(kind='scatter', x='Values', y='Age', 
                     color='cyan')
ax.set_xticklabels(unique_names)
ax


Out[286]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4c572dd438>

In [304]:
age_distrib[["Name", "Age"]].head()


Out[304]:
Name Age
0 Bob 61
1 Rajesh 60
2 Seldon 75
3 Bob 58
4 John 23

In [307]:
age_distrib[["Name", "Age"]].groupby('Name')


Out[307]:
<pandas.core.groupby.DataFrameGroupBy object at 0x7f4c5736a208>

In [301]:
people_count = age_distrib[["Name", "Age"]].groupby('Name').count()

In [302]:
people_count


Out[302]:
Age
Name
Bob 13
Hari 19
Jessica 16
John 17
Rajesh 17
Seldon 17

In [303]:
people_count.plot(kind='pie', y='Age')


Out[303]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4c575f5748>